/******************************************************************************* 
This .do file brings in data on industry structure. 

Note 1: ind_other refers to other manufacturing industry besides ind_metals, 
	so that ind_mnfg is the sum of ind_metals + ind_other 

Note 2: ind_total excludes ind_agro

*******************************************************************************/ 
clear all
set more off

*===============================================================================
* Eurostat Yearbooks
*===============================================================================
cd "$insheet_files/Eurostat Regional Yearbook/"
import excel EYB_IND_1974.xlsx, first clear
note: "Data is from 1973 reported in 1973-74 Eurostat regional yearbook, countries: Germany, France, Belgium, Netherlands and Italy."

* FR82 & FR83 have the exact same industry value. Based on census data looks 
* like it is only for FR82. Drop nuts=="FR83"
gen country=substr(nuts,1,2)
gen nuts1 = substr(nuts,1,3)
drop if nuts=="FR83"|nuts=="IE"|nuts=="LU0"|country=="DK"| nuts=="IE"| ///
	nuts=="LU0"|country=="DK" |nuts1=="UK"|nuts1=="UKC"|nuts1=="UKD"|nuts1=="UKE"| ///
	nuts1=="UKF"|nuts1=="UKH"|nuts1=="UKG"|nuts1=="UKH1"|nuts1=="UKJ"| ///
	nuts1=="UKK"|nuts1=="UKL"

tempfile EYB_full
save 	`EYB_full.dta'

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* separate regions BE24 and BE31 from Brabant, BE1&BE24&BE31 

keep if country=="BE"
mkmat ind_* serv_* if nuts=="BE2", mat(BE2)
mkmat ind_* serv_* if nuts=="BE3", mat(BE3)
matrix l=(1,1,1,1)
mkmat ind_* serv_* if nuts=="BE21"|nuts=="BE22"|nuts=="BE23"|nuts=="BE25", mat(sumBE2)
matrix sumBE2=l*sumBE2
mkmat ind_* serv_* if nuts=="BE32"|nuts=="BE33"|nuts=="BE34"|nuts=="BE35", mat(sumBE3)
matrix sumBE3=l*sumBE3
matrix BE24=BE2-sumBE2
matrix BE31=BE3-sumBE3

clear
svmat BE24, names(sect)
gen nuts="BE24"

tempfile BE24
save `BE24.dta' 

clear
svmat BE31, names(sect)
gen nuts="BE31"

append using `BE24.dta' 

rename sect1 ind_agro
rename sect2 ind_energy
rename sect3 ind_mining
rename sect4 ind_metals
rename sect5 ind_other
rename sect6 ind_construction
rename sect7 ind_total
rename sect8 serv_commerce
rename sect9 serv_transport
rename sect10 serv_credit
rename sect11 serv_admin
rename sect12 serv_other
rename sect13 serv_total
gen country=substr(nuts,1,2)

tempfile BE_24_31
save 	`BE_24_31.dta' 

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Add duplicates with expanded nuts codes (e.g. add DE30 to DE3)

use `EYB_full.dta', clear

keep if nuts=="DE3"|nuts=="DE5"|nuts=="DE6"|nuts=="DEC"|nuts=="DEF"
replace nuts="DE30" if nuts=="DE3"
replace nuts="DE50" if nuts=="DE5"
replace nuts="DE60" if nuts=="DE6"
replace nuts="DEC0" if nuts=="DEC"
replace nuts="DEF0" if nuts=="DEF"

tempfile DE
save 	`DE.dta' 

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Aggregated regions of NL and IT to regular NUTS1 regions
use `EYB_full.dta', clear

gen NUTS1=""
replace NUTS1="NL3" if nuts=="NL31"|nuts=="NL31"|nuts=="NL33"|nuts=="NL34"
replace NUTS1="ITC" if nuts=="ITC1"|nuts=="ITC2"|nuts=="ITC3"|nuts=="ITC4"
replace NUTS1="ITF" if nuts=="ITF1"|nuts=="ITF2"|nuts=="ITF3"|nuts=="ITF4"|nuts=="ITF5"|nuts=="ITF6"
replace NUTS1="ITH" if nuts=="ITH1&ITH2"|nuts=="ITH3"|nuts=="ITH4"|nuts=="ITH5"
replace NUTS1="ITI" if nuts=="ITI1"|nuts=="ITI2"|nuts=="ITI3"|nuts=="ITI4"
replace NUTS1="ITG" if nuts=="ITG1"|nuts=="ITG2"
drop if NUTS1==""

collapse (sum) ind_* serv_* (first) region_name, by(NUTS1)

rename NUTS1 nuts

append using `EYB_full.dta'
append using `DE.dta'
append using `BE_24_31.dta'

tempfile EYB_IND
save 	`EYB_IND.dta' 

*===============================================================================
* Spain
*===============================================================================
cd "$insheet_files/Spain"
import excel "ES_IND_1970.xlsx", first clear

tempfile  ES_nuts3
save 	 `ES_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) ind* serv*, by(nuts2)

rename nuts2 nuts
tempfile  ES_nuts2
save 	 `ES_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) ind* serv*, by(nuts1)

rename nuts1 nuts
tempfile  ES_nuts1
save 	 `ES_nuts1.dta'

collapse (sum) ind* serv*

gen nuts="ES"

append using `ES_nuts1.dta'
append using `ES_nuts2.dta'
append using `ES_nuts3.dta'

tempfile ES_IND
save 	`ES_IND.dta' 

*===============================================================================
* Denmark 1971 Census
*===============================================================================
capture cd "$insheet_files/Denmark"
import excel using "DK_IND_1971.xlsx", clear first 

collapse (sum) ind* serv*, by(nuts)

tempfile DK_nuts2
save `DK_nuts2.dta'

gen nuts1 = substr(nuts, 1,3)

collapse (sum) ind* serv*, by(nuts1)

rename nuts1 nuts

tempfile DK_nuts1
save `DK_nuts1.dta'

collapse (sum) ind* serv*

gen nuts = "DK"

append using `DK_nuts1.dta'
append using `DK_nuts2.dta'

tempfile DK_IND
save 	`DK_IND.dta'

*===============================================================================
* Sweden
*===============================================================================
capture cd "$insheet_files/Sweden"
import excel "SE_IND_1970.xlsx", clear first

tempfile SE_nuts3
save 	`SE_nuts3.dta'

gen nuts2 = substr(nuts,1,4) 

collapse (sum) ind* serv*, by(nuts2)

rename nuts2 nuts
tempfile SE_nuts2
save `SE_nuts2.dta'

gen nuts1 = substr(nuts,1,3) 

collapse (sum) ind* serv*, by(nuts1)

rename nuts1 nuts
tempfile SE_nuts1
save `SE_nuts1.dta'

gen country=substr(nuts,1,2)

collapse (sum) ind* serv*, by(country)

rename country nuts

append using `SE_nuts1.dta'
append using `SE_nuts2.dta'
append using `SE_nuts3.dta'

tempfile SE_IND
save	`SE_IND.dta'

*===============================================================================
* Finland
*===============================================================================
cd "$insheet_files/Finland"
import excel "FI_IND_1970.xlsx", clear first

gen nuts2 = substr(nuts,1,4) 

collapse (sum) ind* serv*, by(nuts2)

rename nuts2 nuts
tempfile FI_nuts2
save `FI_nuts2.dta'

gen nuts1 = substr(nuts,1,3) 

collapse (sum) ind* serv*, by(nuts1)

rename nuts1 nuts
tempfile FI_nuts1
save `FI_nuts1.dta'

gen country=substr(nuts,1,2)

collapse (sum) ind* serv*, by(country)

rename country nuts

append using `FI_nuts1.dta'
append using `FI_nuts2.dta'

tempfile FI_IND
save 	`FI_IND.dta'

*===============================================================================
* AT 1971 Census
*===============================================================================
cd "$insheet_files/Austria"
import excel using "AT_IND_1971.xlsx" , clear first

tempfile AT_IND
save 	`AT_IND.dta'

*===============================================================================
* CH 1970 Census
*===============================================================================
cd "$insheet_files/Switzerland"
import excel using "CH_IND_1970.xlsx" , clear first

replace ind_mining=ind_mining+ind_chemical
drop ind_chemical

tempfile CH_IND
save 	`CH_IND.dta'

*===============================================================================
* Merge all files
*===============================================================================

use `EYB_IND.dta', clear

append using `AT_IND.dta'
append using `CH_IND.dta'
append using `DK_IND.dta' 
append using `ES_IND.dta' 
append using `FI_IND.dta'
append using `SE_IND.dta'

drop country 
sort nuts
save "$dta_files/IC_EU_IND", replace

